library(ggplot2)
library(tidyverse)
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.0 ✔ readr 2.1.2
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ lubridate 1.8.0 ✔ tibble 3.1.8
## ✔ purrr 1.0.1 ✔ tidyr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(dplyr)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(sf)
## Warning: package 'sf' was built under R version 4.0.5
## Linking to GEOS 3.9.1, GDAL 3.4.0, PROJ 8.1.1; sf_use_s2() is TRUE
library(leaflet)
car_df <- read.csv("EVPopulation.csv")
income_df <- read.csv("Median_Household_Income.csv")
car_df_na <- sapply(car_df, function(x) sum(is.na(x)))
print(car_df_na)
## VIN..1.10.
## 0
## County
## 0
## City
## 0
## State
## 0
## Postal.Code
## 4
## Model.Year
## 0
## Make
## 0
## Model
## 0
## Electric.Vehicle.Type
## 0
## Clean.Alternative.Fuel.Vehicle..CAFV..Eligibility
## 0
## Electric.Range
## 0
## Base.MSRP
## 0
## Legislative.District
## 361
## DOL.Vehicle.ID
## 0
## Vehicle.Location
## 0
## Electric.Utility
## 0
## X2020.Census.Tract
## 4
#Just Washington
wa_data <- subset(car_df, State == "WA")
income_df_na <- sapply(income_df, function(x) sum(is.na(x)))
print(income_df_na)
## Name Variable Value Year
## 0 0 0 0
## Geography.ID Geography.Name Geography.Type ACS.Year.Estimate
## 0 0 0 0
## Location Date
## 0 0
#Only contain county variable
county_income_df <- income_df %>%
filter(grepl("County", Geography.Type, ignore.case = TRUE))
#Fix 'Name' variable to only conatin county
county_income_df$Name <- str_extract(county_income_df$Name, "\\w+")
dim(wa_data)
#County
unique_County <- unique(wa_data$County)
print(unique_County)
#City
unique_City <- unique(wa_data$City)
print(unique_City)
#State
unique_State <- unique(wa_data$State)
print(unique_State)
#Zip Code
unique_zip <- unique(wa_data$Postal.Code)
print(unique_zip)
#Make
unique_make <- unique(wa_data$Make)
print(unique_make)
#Model Year
unique_model_year <- unique(wa_data$Model.Year)
print(unique_model_year)
#Mile Range
unique_miles <- unique(wa_data$Electric.Range)
print(unique_miles)
#County check
Pierce_County <- subset(wa_data, County == "Pierce") #12,315
King_County <- subset(wa_data, County == "King") #83,413 rows
#County
unique_Name <- unique(county_income_df$Name)
print(unique_Name)
#Contains data from 2011 - 2021
PierceCounty <- subset(county_income_df, Name == "Pierce")
#Median Income
summary(PierceCounty$Value)
What make and model is the most popular?
#Top ten most popular makes
make_counts <- table(wa_data$Make)
sorted_make_counts <- sort(make_counts, decreasing = TRUE)
top_ten_make <- names(sorted_make_counts)[1:10]
top_ten_make_data <- data.frame(Make = names(sorted_make_counts)[1:10], Count = sorted_make_counts[1:10])
ggplot(top_ten_make_data, aes(x = Make, y = Count.Freq, fill = Make)) +
geom_bar(stat = "identity") +
labs(title = "Top Ten Most Popular Makes",
x = "Make",
y = "Count") +
theme_minimal() +
guides(fill = FALSE) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
#Top ten most popular models
model_counts <- table(wa_data$Model)
sorted_model_counts <- sort(model_counts, decreasing = TRUE)
top_ten_models <- names(sorted_model_counts)[1:10]
top_ten_data <- data.frame(Model = names(sorted_model_counts)[1:10], Count = sorted_model_counts[1:10])
ggplot(top_ten_data, aes(x = Model, y = Count.Freq, fill = Model)) +
geom_bar(stat = "identity") +
labs(title = "Top Ten Most Popular Models",
x = "Model",
y = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
#Printing out with all of the other info to see which model matches the make
model_counts2 <- table(wa_data$Model)
sorted_model_counts2 <- sort(model_counts2, decreasing = TRUE)
top_ten_models2 <- names(sorted_model_counts2)[1:10]
filtered_data2 <- wa_data %>% filter(Model %in% top_ten_models2)
Which counties have the most electric vehicles?
#Top ten counties with the most electric cars
model_county <- table(wa_data$County)
sorted_model_county <- sort(model_county, decreasing = TRUE)
top_county <- names(sorted_model_county)[1:10]
top_county_data <- data.frame(County = names(sorted_model_county)[1:10], Count = sorted_model_county[1:10])
ggplot(top_county_data, aes(x = County, y = Count.Freq, fill = County)) +
geom_bar(stat = "identity") +
labs(title = "Top Ten Counties with the Most Electric Vehicles",
x = "County",
y = "Count") +
theme_minimal() +
guides(fill = FALSE)
Based on how many people live in King County, what is the proportion of people who drive electric cars?
#King County
kingCounty_car <- subset(wa_data, County == "King")
#Count the number of electric cars in King County
electric_car_count_king <- nrow(kingCounty_car)
#Total population of King County
total_population_king <- 2252000
#Calculate the proportion
proportion_electric_cars_king <- electric_car_count_king / total_population_king
proportion_electric_cars_king <- proportion_electric_cars_king * 100
cat("Percentage of people in King County with an Electric car:", proportion_electric_cars_king, "%\n")
## Percentage of people in King County with an Electric car: 3.703952 %
Based on how many people live in Snohomish County, what is the proportion of people who drive electric cars?
#Snohomish County
snohomishCounty_car <- subset(wa_data, County == "Snohomish")
electric_car_count_snohomish <- nrow(snohomishCounty_car)
total_population_snohomish <- 833540
proportion_electric_cars_snohomish <- electric_car_count_snohomish / total_population_snohomish
proportion_electric_cars_snohomish <- proportion_electric_cars_snohomish * 100
cat("Percentage of people in Snohomish County with an Electric car:", proportion_electric_cars_snohomish, "%\n")
## Percentage of people in Snohomish County with an Electric car: 2.224728 %
Based on how many people live in Pierce County, what is the proportion of people who drive electric cars?
#Pierce County
pierceCounty_car <- subset(wa_data, County == "Pierce")
electric_car_count_pierce <- nrow(pierceCounty_car)
total_population_pierce <- 925708
proportion_electric_cars_pierce <- electric_car_count_pierce / total_population_pierce
proportion_electric_cars_pierce <- proportion_electric_cars_pierce * 100
cat("Percentage of people in Pierce County with an Electric car:", proportion_electric_cars_pierce, "%\n")
## Percentage of people in Pierce County with an Electric car: 1.330333 %
Based on how many people live in WA what is the proportion of people who drive electric cars?
#WA
electric_car_count_wa <- nrow(wa_data)
total_population_wa <- 7739000
proportion_electric_cars_wa <- electric_car_count_wa / total_population_wa
proportion_electric_cars_wa <- proportion_electric_cars_wa * 100
cat("Percentage of people in all of WA with an Electric car:", proportion_electric_cars_wa, "%\n")
## Percentage of people in all of WA with an Electric car: 2.055899 %
Based on how many cars are registered in WA, what is the proportion of electric cars?
#WA
electric_car_count_wa <- nrow(wa_data)
#Total CAR population of WA
total_population_wa <- 7966147
proportion_electric_cars_wa <- electric_car_count_wa / total_population_wa
proportion_electric_cars_wa <- proportion_electric_cars_wa * 100
cat("Percentage of EVs out of all total registered cars:", proportion_electric_cars_wa, "%\n")
## Percentage of EVs out of all total registered cars: 1.997277 %
Interactive plot that shows median income by county, with number of cars in 2021.
#List of top ten counties
selected_counties <- c("King", "Snohomish", "Pierce", "Clark" ,"Thurston", "Kitsap", "Spokane", "Whatcom", "Benton", "Skagit")
#Just get top ten counties
filtered_wa_data <- wa_data %>% filter(County %in% selected_counties)
#Cars per county
electric_vehicle_summary <- filtered_wa_data %>%
group_by(County) %>%
summarize(electric_vehicle_count = n())
#Only income on 2021
filtered_income_df <- county_income_df %>%
filter(Name %in% selected_counties, Year == 2021)
#Merge filtered_income_df to include median household income
merged_data <- left_join(electric_vehicle_summary, filtered_income_df, by = c("County" = "Name"))
#ordered_data$County <- factor(ordered_data$County, levels = ordered_data$County)
#Make interactive plot
plot <- plot_ly(merged_data, x = ~County, y = ~Value, type = 'bar',
marker = list(color = ~Value, colorscale = 'heat'),
text = ~paste("# of Electric Vehicles: ", electric_vehicle_count,
"<br>Median Income: $", Value))
plot <- plot %>% layout(title = 'Income by County & Electric Car Count (2021)',
xaxis = list(title = 'Top Ten counties with the most Electric Cars'),
yaxis = list(title = 'Median Income'))
plot
#I dont think this is needed?
correlation_data <- merged_data %>% select(electric_vehicle_count, Value)
correlation_matrix <- cor(correlation_data)
Of the top five companies that are the most popular, how does their mileage span over the years?
#Looking into the top 5 companies that make EV's and their mileage span over the years
selected_makes <- c("TESLA", "NISSAN", "CHEVROLET", "FORD", "BMW")
selected_make_data <- wa_data %>%
filter(Make %in% selected_makes)
# Create an interactive scatter plot using plotly
plot4 <- plot_ly(selected_make_data, x = ~Model.Year, y = ~Electric.Range, color = ~Make, text = ~Model,
type = 'scatter', mode = 'markers',
hoverinfo = "text")
# Customize the layout
plot4 <- plot4 %>% layout(title = 'Electric Range vs Model Year by Make',
xaxis = list(title = 'Model Year'),
yaxis = list(title = 'Electric Range (miles)'),
showlegend = TRUE)
plot4
Create a map of Pierce county and all of the Nissan cars
# Filter data for NISSAN cars
nissan_data <- wa_data[wa_data$Make == "NISSAN" & wa_data$County == "Pierce",] #998 cars
#Split the string into latitude and longitude
nissan_data$Vehicle.Location <- gsub("POINT \\((.*)\\)", "\\1", nissan_data$Vehicle.Location)
coordinates <- str_split_fixed(nissan_data$Vehicle.Location, " ", 2)
nissan_data$Latitude <- as.numeric(coordinates[, 1])
nissan_data$Longitude <- as.numeric(coordinates[, 2])
#nissan_data #now has log and lat
#head(nissan_data)
#Create a leaflet map
car_map <- leaflet(data = nissan_data) %>%
addTiles()
#Add markers for each car location
car_map <- car_map %>%
addMarkers(lng = ~Latitude, lat = ~Longitude, popup = ~paste(
"Car Model: ", Model, "<br>",
"Year Made: ", Model.Year, "<br>",
"City: ", City, "<br>",
"Electric Range: ", Electric.Range, " miles"))
car_map